﻿*----------------------------------------------------------------------------------------------------------	* 
* PROGRAMMED BY:	Linea Hasager																   			*
* DESCRIPTION:		Cleaning 1999-2003 language training from Danskuddannelsesdatabasen (DKDB)				*
* CREATED:			Nov. 20, 2019																		   	*
* LAST MODIFIED:	May 30, 2022													       					*
* Note:				Run program in SAS Enterprise Guide 7.1
*----------------------------------------------------------------------------------------------------------	;

*LIBNAMES;
libname pop "H:\Rawdata\707455\Grunddata";
libname raw "H:\Rawdata\707455\Views";
libname dkdb1999 "H:\rawdata\707455\DKDB\DKDB_VIVE\1999";
libname dkdb2000 "H:\rawdata\707455\DKDB\DKDB_VIVE\2000";
libname dkdb2001 "H:\rawdata\707455\DKDB\DKDB_VIVE\2001";
libname dkdb2002 "H:\rawdata\707455\DKDB\DKDB_VIVE\2002";
libname dkdb2003 "H:\rawdata\707455\DKDB\DKDB_VIVE\2003";


*MACROS FOR INPUT, OUTPUT AND PROGRAMS;;
%let input	=	L:\Workdata\707455\Papers\ABFHP_1999reform\Code\ReStat\Input;




*------------------------------------*
*		1) IMPORT AND CLEAN DATA
*------------------------------------;

*	1999;
*PARTICIPANTS;
data dkdb1999ku (drop=_:);
set dkdb1999.SIK_SILO_INDV_UV__99_KU_T
(rename=(pnr=_pnr kommunekode=_kommunekode instnr_=_instnr_ kommunenr_=_kommunenr katogori=_kategori skolekod=skolekode
skolegang=_skolegang udd=_udd uddsaar=_uddsaar uddvar=_uddvar teoprak=_teoprak erhvarb=_erhvarb));
year=1999;
pnr=_pnr+0;
kommunekode=_kommunekode+0;
instnr=_instnr_+0;
kommunenr=_kommunenr+0;
kategori=_kategori+0;
skolegang=_skolegang+0;
udd=_udd+0;
uddsaar=_uddsaar+0;
uddvar=_uddvar+0;
teoprak=_teoprak+0;
erhvarb=_erhvarb+0;
run;

*PARTICIPANTS CLASSES;
data dkdb1999kuho (drop=_:);
set dkdb1999.SIK_SILO_INDV_UV__99_KUHO_T
(rename=(INSTNR_=_instnr_ kommunenr_=_kommunenr));
instnr=_instnr_+0;
kommunenr=_kommunenr+0;
run;

*AGGREGATE CLASS INFO;
data dkdb1999ho (drop=_:);
set dkdb1999.SIK_SILO_INDV_UV__99_HO_T
(rename=(spor=_spor trin=_trin niveau=_niveau instnr_=_instnr_ timer_pr__uge=timer_pr_uge kommunenr_=_kommunenr afthold=_afthold
rekv=_rekv holdtype=_holdtype skolekode=_skolekode));
spor=_spor+0;
trin=_trin+0;
niveau=_niveau+0;
instnr=_instnr_+0;
kommunenr_ho=_kommunenr+0;
afthold=_afthold+0;
rekv=_rekv+0;
holdtype=_holdtype+0;
skolekode=_skolekode+0;
run;



*	2000;
*PARTICIPANTS;
data dkdb2000ku (drop=_:);
set dkdb2000.SIK_SILO_INDV_UV__00_KU_T
(rename=(pnr=_pnr kommunekode=_kommunekode instnr_=_instnr_ kommunenr_=_kommunenr katogori=_kategori skolekod=skolekode
skolegang=_skolegang udd=_udd uddsaar=_uddsaar uddvar=_uddvar teoprak=_teoprak erhvarb=_erhvarb));
year=2000;
pnr=_pnr+0;
kommunekode=_kommunekode+0;
instnr=_instnr_+0;
kommunenr=_kommunenr+0;
kategori=_kategori+0;
skolegang=_skolegang+0;
udd=_udd+0;
uddsaar=_uddsaar+0;
uddvar=_uddvar+0;
teoprak=_teoprak+0;
erhvarb=_erhvarb+0;
run;

*PARTICIPANTS CLASSES;
data dkdb2000kuho (drop=_:);
set dkdb2000.SIK_SILO_INDV_UV__00_KUHO_T
(rename=(INSTNR_=_instnr_ kommunenr_=_kommunenr));
instnr=_instnr_+0;
kommunenr=_kommunenr+0;
run;

*AGGREGATE CLASS INFO;
data dkdb2000ho (drop=_:);
set dkdb2000.SIK_SILO_INDV_UV__00_HO_T
(rename=(spor=_spor trin=_trin niveau=_niveau instnr_=_instnr_ timer_pr__uge=timer_pr_uge afthold=_afthold rekv=_rekv holdtype=_holdtype
skolekode=_skolekode));
spor=_spor+0;
trin=_trin+0;
niveau=_niveau+0;
instnr=_instnr_+0;
afthold=_afthold+0;
rekv=_rekv+0;
holdtype=_holdtype+0;
skolekode=_skolekode+0;
run;

*	2001-2003;
%macro year;
*PARTICIPANTS;
%do i=2001 %to 2003;
data dkdb&i.ku (drop=_:);
set dkdb&i..KURS&i.TOTAL
(rename=(pnr=_pnr komkode=kommunekode natkode=nation sprgkode=sprogkode skolegan=skolegang ftimer=f_timer ttimer=t_timer skolekod=skolekode));
year=&i.;
pnr=_pnr+0;
run;

*PARTICIPANTS CLASSES;
data dkdb&i.kuho (drop=_:);
set dkdb&i..KURSHOLD&i.TOTAL
(rename=(pnr=_pnr skolekod=skolekode ttimer=t_timer ftimer=f_timer startdat=start_dato slutdato=slut_dato));
pnr=_pnr+0;
run;


*AGGREGATE CLASS INFO;
data dkdb&i.ho (drop=_:);
set dkdb&i..HOLD&i.TOTAL
(rename=(skolekod=skolekode tpruge=timer_pr_uge ttimer=t_timer ftimer=f_timer startdat=start_dato slutdato=slut_dato));
run;
%end;
%mend;
%year;


*GENERATE UNIQUE CLASS ID (HOLDID) FOR 2001-2003;
*2001;
data dkdb2001ho;
set dkdb2001ho;
holdid+1;
run;

*STORE MAXIMUM ID IN MACRO TO CREATE IDS FOR THE NEXT YEAR THAT ARE DISTINCT FROM THE YEAR BEFORE;
*2002;
proc sql noprint; select max(holdid) into: max2001 from dkdb2001ho; quit;

data dkdb2002ho (drop=_:);
set dkdb2002ho;
__holdid=&max2001.;
_holdid+1;
holdid=_holdid+&max2001.;
run;

*2003;
proc sql noprint; select max(holdid) into: max2002 from dkdb2002ho; quit;

data dkdb2003ho (drop=_:);
set dkdb2003ho;
__holdid=&max2002.;
_holdid+1;
holdid=_holdid+&max2002.;
run;



*SORT DATA;
*1999-2000;
%macro yearsort1;
%do i=1999 %to 2000;
proc sort data=dkdb&i.ku (drop=instnr) nodupkey;	by cprid; run;
proc sort data=dkdb&i.kuho; 						by cprid; run;
proc sort data=dkdb&i.ho nodupkey; 					by holdkode instnr; run;
%end;
%mend;
%yearsort1;

*2001;
proc sort data=dkdb2001ku	(rename=instkode=instnr) nodupkey;	by pnr; run;
proc sort data=dkdb2001kuho	(rename=instkode=instnr); 			by pnr; run;
proc sort data=dkdb2001ho 	(rename=instkode=instnr) nodupkey;	by holdkode instnr; run;

*2002-2003;
%macro yearsort2;
%do i=2002 %to 2003;
proc sort data=dkdb&i.ku nodupkey;	by pnr; run;
proc sort data=dkdb&i.kuho;			by pnr; run;
proc sort data=dkdb&i.ho nodupkey;	by holdkode ckkode; run;
%end;
%mend;
%yearsort2;


*MERGE DATA;
*1999-2000;
%macro combine1;
%do year=1999 %to 2000;

*ADD ALL CLASS INFO TO INDIVIDUALS;
data dkdb&year._population;
merge dkdb&year.ku (in=a)
dkdb&year.kuho
(rename=(skolekode=skolekode_kuho kategori=kategori_kuho f_timer=f_timer_kuho t_timer=t_timer_kuho kommunenr=kommunenr_kuho));
by cprid;
if a;
run;

proc sort data = dkdb&year._population; by holdkode instnr; run;

*ADD AGGREGATE CLASS INFO TO INDIVIDUALS CLASSES;
data dkdb&year._population2
(drop=kon cprid skolekode f_timer t_timer spor--niveau skolekode_kuho kategori_kuho skolekode_ho f_timer_ho t_timer_ho);
merge dkdb&year._population (in=a)
dkdb&year.ho (rename=(skolekode=skolekode_ho timer_pr_uge=timer_pr_uge_ho start_dato=start_dato_ho slut_dato=slut_dato_ho
spor=spor_ho niveau=niveau_ho trin=trin_ho f_timer=f_timer_ho t_timer=t_timer_ho));
by holdkode instnr;
if a;
run;

proc sort data=dkdb&year._population2 nodup; by pnr start_dato; run;

%end;
%mend;
%combine1;


*2001;
%macro combine2;
%do year=2001 %to 2001;

*ADD ALL CLASS INFO TO INDIVIDUALS;
data dkdb&year._population;
merge dkdb&year.ku (in=a drop=instnr)
dkdb&year.kuho
(rename=(skolekode=skolekode_kuho kategori=kategori_kuho f_timer=f_timer_kuho t_timer=t_timer_kuho));
by pnr;
if a;
run;

proc sort data = dkdb&year._population; by holdkode instnr; run;

*ADD AGGREGATE CLASS INFO TO INDIVIDUALS CLASSES;
data dkdb&year._population2
(drop=kon cprid skolekode spor--niveau f_timer t_timer skolekode_kuho kategori_kuho skolekode_ho f_timer_ho t_timer_ho);
merge dkdb&year._population (in=a)
dkdb&year.ho (rename=(skolekode=skolekode_ho timer_pr_uge=timer_pr_uge_ho start_dato=start_dato_ho slut_dato=slut_dato_ho
spor=spor_ho niveau=niveau_ho trin=trin_ho f_timer=f_timer_ho t_timer=t_timer_ho));
by holdkode instnr;
if a;
run;

proc sort data=dkdb&year._population2 nodup; by pnr start_dato; run;

%end;
%mend;
%combine2;


*2002-2003;
%macro combine3;
%do year=2002 %to 2003;

*ADD ALL CLASS INFO TO INDIVIDUALS;
data dkdb&year._population;
merge dkdb&year.ku (in=a)
dkdb&year.kuho
(rename=(skolekode=skolekode_kuho kategori=kategori_kuho f_timer=f_timer_kuho t_timer=t_timer_kuho));
by pnr;
if a;
run;

proc sort data = dkdb&year._population; by holdkode ckkode; run;

*ADD AGGREGATE CLASS INFO TO INDIVIDUALS CLASSES;
data dkdb&year._population2
(drop=kon skolekode f_timer t_timer spor--niveau skolekode_kuho kategori_kuho skolekode_ho f_timer_ho t_timer_ho);
merge dkdb&year._population (in=a)
dkdb&year.ho
(rename=(skolekode=skolekode_ho timer_pr_uge=timer_pr_uge_ho start_dato=start_dato_ho slut_dato=slut_dato_ho
spor=spor_ho niveau=niveau_ho trin=trin_ho f_timer=f_timer_ho t_timer=t_timer_ho));
by holdkode ckkode;
if a;
run;

proc sort data=dkdb&year._population2 nodup; by pnr start_dato; run;

%end;
%mend;
%combine3;


*COLLECT DATA AND CREATE LABELS;
data dkdb (drop=_: keep=pnr year start_dato nation sprogkode t_timer_kuho holdid);
retain pnr year start_dato nation sprogkode ;

set dkdb1999_population2
	(rename=( start_dato=_start_dato slut_dato=_slut_dato start_dato_ho=_start_dato_ho slut_dato_ho=_slut_dato_ho henvdato=_henvdato
	modtdato=_modtdato))
	dkdb2000_population2
	(rename=( start_dato=_start_dato slut_dato=_slut_dato start_dato_ho=_start_dato_ho slut_dato_ho=_slut_dato_ho henvdato=_henvdato
	modtdato=_modtdato))
	dkdb2001_population2
	(rename=( start_dato=_start_dato slut_dato=_slut_dato start_dato_ho=_start_dato_ho slut_dato_ho=_slut_dato_ho henvdato=_henvdato
	modtdato=_modtdato))
	dkdb2002_population2
	(rename=( start_dato=_start_dato slut_dato=_slut_dato start_dato_ho=_start_dato_ho slut_dato_ho=_slut_dato_ho))
	dkdb2003_population2
	(rename=( start_dato=_start_dato slut_dato=_slut_dato start_dato_ho=_start_dato_ho slut_dato_ho=_slut_dato_ho));



if sprogkode="GRÆ" then sprogkode="GRA";
if sprogkode="GRØ" then sprogkode="GRO";
if sprogkode="GR´" then sprogkode="GRA";
if sprogkode="GR`" then sprogkode="GRA";
if sprogkode="GR’" then sprogkode="GRA";
if sprogkode="GR‘" then sprogkode="GRA";
if sprogkode="ØST" then sprogkode="OST";


*CLEAN DATE VARIABLES FOR DATA FROM 2001-2003;
if year in(1999:2000) then do;
%macro rename(var=);
&var.= datepart(_&var.);
format &var. date9.;
%mend;
%rename(var=start_dato);
end;

if year in(2001:2003) then do;
_temp='01jan1960:04:09:36'dt;
_tempstart='01jan2001'd;
format _tempstart date9. _temp datetime.;
%macro cleandate(var=);
_days_&var.=_&var.-_temp;
&var.=intnx('day',_tempstart,_days_&var.);
format &var. date9.;
%mend;
%cleandate(var=start_dato);
end;

label year="Register Source Year" nation ="Nationality" sprogkode="Language" 
start_dato="Individual Start Date Class" t_timer_kuho="Individual Hours Assigned" holdid="Class ID";
run;



*------------------------------------*
*		2) EXPORT TO STATA
*------------------------------------;

proc sort data=dkdb; by pnr year start_dato; run;

proc export data=dkdb dbms=dta outfile="&input.\_dkdb1999_2003" replace; run;
